LA Stolen Bikes - 2014 Data Analysis


In [132]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
%matplotlib inline

In [133]:
data = pd.read_csv("Stolen_Bikes-2014.csv")
totalData = data.copy(deep=True)
data.head()


Out[133]:
DATE OCC Crm Cd Desc AREA NAME DR NO
0 11/07/2014 BIKE - STOLEN Hollenbeck 1
1 12/15/2014 BIKE - STOLEN Wilshire 1
2 09/17/2014 BIKE - STOLEN Southwest 1
3 01/14/2014 BIKE - STOLEN Hollywood 1
4 09/24/2014 BIKE - STOLEN Central 1

In [134]:
len(data)


Out[134]:
971

In [135]:
for i in data[data.columns[1:]]:
    print np.unique(data[i])


['BIKE - ATTEMPTED STOLEN' 'BIKE - STOLEN']
['77th Street' 'Central' 'Devonshire' 'Foothill' 'Harbor' 'Hollenbeck'
 'Hollywood' 'Mission' 'N Hollywood' 'Newton' 'Northeast' 'Olympic'
 'Pacific' 'Rampart' 'Southeast' 'Southwest' 'Topanga' 'Van Nuys' 'West LA'
 'West Valley' 'Wilshire']
[1 2 3 4]

I have no idea what "DR NO" means, but let's continue.


In [136]:
count_by_area = np.unique(data['AREA NAME'],return_counts=True)
count_by_area


Out[136]:
(array(['77th Street', 'Central', 'Devonshire', 'Foothill', 'Harbor',
        'Hollenbeck', 'Hollywood', 'Mission', 'N Hollywood', 'Newton',
        'Northeast', 'Olympic', 'Pacific', 'Rampart', 'Southeast',
        'Southwest', 'Topanga', 'Van Nuys', 'West LA', 'West Valley',
        'Wilshire'], dtype=object),
 array([ 22, 180,  50,  28,  28,  43,  77,   7,  43,  10, 119, 102,   1,
         75,   4,  62,   9,   7,   7,  25,  72]))

In [137]:
plt.figure(figsize=(15,5))
plt.bar(range(len(count_by_area[0])),count_by_area[1])
plt.xticks(range(len(count_by_area[0])),count_by_area[0],rotation=70)
plt.grid()
plt.show()



In [138]:
count_by_date = np.unique(data['DATE OCC'],return_counts=True)
print count_by_date[0][0:5]
print count_by_date[1][0:5]


['01/01/2014' '01/02/2014' '01/03/2014' '01/04/2014' '01/05/2014']
[1 2 2 1 1]

In [139]:
def whichDay(dt):
    month, day, year = [int(x) for x in dt.split('/')]
    return datetime(year,month,day).weekday()#monday is 0, Sunday is 6

data['DATE OCC'] = data['DATE OCC'].apply(whichDay)
data.head()


Out[139]:
DATE OCC Crm Cd Desc AREA NAME DR NO
0 4 BIKE - STOLEN Hollenbeck 1
1 0 BIKE - STOLEN Wilshire 1
2 2 BIKE - STOLEN Southwest 1
3 1 BIKE - STOLEN Hollywood 1
4 2 BIKE - STOLEN Central 1

In [140]:
count_by_day_of_week = np.unique(data['DATE OCC'],return_counts=True)
count_by_day_of_week


Out[140]:
(array([0, 1, 2, 3, 4, 5, 6]), array([149, 137, 128, 141, 147, 131, 138]))

In [141]:
plt.plot(count_by_day_of_week[0],count_by_day_of_week[1],marker='x')
plt.ylim([0,200])
plt.xticks(range(0,7),['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],rotation=70)
plt.title("Bikes stolen by day of the week for 2014")
plt.ylabel("# of bikes stolen")
plt.grid()
plt.show()



In [142]:
plt.figure(figsize=(15,6))

cityNames = np.unique(totalData['AREA NAME'])
for i in range(len(cityNames)):#for each city
    tempData = totalData.copy(deep=True)
    tempData = tempData[tempData['AREA NAME']==cityNames[i]]#for individual city
    tempData['DATE OCC'] = tempData['DATE OCC'].apply(whichDay)
    count_by_day_of_week = np.unique(tempData['DATE OCC'],return_counts=True)
    plt.plot(count_by_day_of_week[0],count_by_day_of_week[1],label=cityNames[i])

plt.xticks(range(0,7),['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'],rotation=70)
plt.title("Bikes stolen by day of the week for each city for 2014")
plt.ylabel("# of bikes stolen")
plt.legend(bbox_to_anchor=(1.2, 1.05))
plt.grid()
plt.show()


This is SUPER messy, but hopefully someone can expand on it.


In [143]:
totalData.head()


Out[143]:
DATE OCC Crm Cd Desc AREA NAME DR NO
0 11/07/2014 BIKE - STOLEN Hollenbeck 1
1 12/15/2014 BIKE - STOLEN Wilshire 1
2 09/17/2014 BIKE - STOLEN Southwest 1
3 01/14/2014 BIKE - STOLEN Hollywood 1
4 09/24/2014 BIKE - STOLEN Central 1

In [144]:
def whichMonth(dt):
    month, day, year = [int(x) for x in dt.split('/')]
    return month

monthData = totalData.copy(deep=True)

monthData['DATE OCC'] = monthData['DATE OCC'].apply(whichMonth)
monthData.head()


Out[144]:
DATE OCC Crm Cd Desc AREA NAME DR NO
0 11 BIKE - STOLEN Hollenbeck 1
1 12 BIKE - STOLEN Wilshire 1
2 9 BIKE - STOLEN Southwest 1
3 1 BIKE - STOLEN Hollywood 1
4 9 BIKE - STOLEN Central 1

In [145]:
count_by_month = np.unique(monthData['DATE OCC'],return_counts=True)
count_by_month


Out[145]:
(array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]),
 array([ 61,  51,  67,  79,  77, 100,  90, 102,  84,  88,  90,  82]))

In [147]:
plt.plot(count_by_month[0],count_by_month[1],marker='x')
plt.ylim([0,120])
plt.grid()
plt.xticks(range(1,13),['January','February','March','April','May','June','July','August','September','October','November','December'],rotation=70)
plt.title("Bikes stolen by month for the year of 2014")
plt.ylabel("# of bikes stolen")
plt.show()



In [ ]: